Notes: Data: 2020-11-03_Switzerland_upto_2018-11-31, Switzerland_2018-12-01_2020-11-03. Pre-processing: After gathering the data, the two datasets were merged and it was cleaned the duplicates and empty values, specially for dates
PACKAGES
#CLEANING PROCESS TO JOIN GATHERED DATA FROM FLICKR
#data <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/2020-11-03_Switzerland_upto_2018-11-31.csv", encoding = "UTF-8" )
#saveRDS(data, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#data2 <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Switzerland_2018-12-01_2020-11-03.csv", encoding = "UTF-8")
#saveRDS(data2, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")
# export dataframes to backup the main tables
#d1 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#d2 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")
#Join two dataset in a single main dataframe
#raw <- dplyr::bind_rows(d1, d2)
#raw$tags <- gsub(";", " ", raw$tags) # Remove symbol in column tags
#raw$post_title <- gsub(";", " ", raw$post_title) # Remove symbol in column post_title
#raw$post_body <- gsub(";", " ", raw$post_body) # Remove symbol in column post_body
#raw_clean <- subset(raw, select= c("post_guid", "longitude", "latitude", "user_guid", "post_create_date", "post_publish_date", "tags", "post_title", "post_body", "place_guid"))
#raw_clean <- distinct(raw_clean, post_guid, .keep_all = TRUE)
#raw_clean <- filter(raw_clean, longitude >= 0.0001)
#Export the total table for future applications
#write.table(raw_clean,"~/GitHub/Flickr_SwissParks/Join2018-2020.csv", sep=";", dec=".")
# define root folders for data
root_folder <- '~/GitHub/Flickr_SwissParks/' # local folder
setwd<- '~/GitHub/Flickr_SwissParks/'
# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326" # lat/lng
crs_sng <- "+init=epsg:2056" # Swiss National, CH1903+ / LV95
# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326" # lat/lng
crs_osm <- "+init=epsg:3857" # OSM projection
crs_bng <- "+init=epsg:27700" # British National Grid, BNG
crs_sng <- "+init=epsg:2056" # Swiss National, CH1903+ / LV95
# Set coordinate systems for reprojecting
proj_wgs84 <- '+proj=longlat +datum=WGS84'
proj_osm <- '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs'
proj_bng <- '+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs'
proj_sng <- '+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs'
# set point at which plot will switch from standard form
options(scipen=7)
# chart theme
t <- theme_bw() +
theme(panel.border=element_blank(), # removes border around chart area
axis.text = element_text(size = 10, colour='#444444'),
axis.ticks = element_line(colour='gray'))
# reproject from lat/lng (WGS84) to another CRS
# ---------------------------------------------
# requires columns labeled 'lat', 'lng' in first two columns, and at least two other columns (4 cols minimum)
reproject <- function(df, proj_crs) {
dataset_map_coords <- df[c("lng", "lat")] # specifying names to avoid mixing x and y up in order...
dataset_map_data <- df[ ,c(3:ncol(df))]
dataset_map <- SpatialPointsDataFrame(coords=dataset_map_coords, data=dataset_map_data)
# set CRS and reproject to OSM for OSM number system
dataset_map@proj4string # check first, should be NA - not yet been set
proj4string(dataset_map) <- CRS(crs_wgs84) # set the current coordinates system
dataset_map <- spTransform(dataset_map, CRS(proj_crs)) # reproject to new CRS
# convert back to dataframe with reprojected cooordinates and then amend to original dataset
newdf <- as.data.frame(dataset_map)
names(newdf)[names(newdf) == "lng"] <- 'crs_x';
names(newdf)[names(newdf) == "lat"] <- 'crs_y'
newdf <- newdf[, c("crs_x", "crs_y")] # just want to keep the reprojected coordinates to append back to dataset
df <- cbind(df, newdf)
return(df)
}
# load in the data files and standardise lat/lng column name for reproject
# ---------------------------------------
# load data file, rename coordinates columns
loaddata <- function(input_file) {
folder <- paste0(root_folder, '')
input_data <- paste0(folder, input_file)
df <- read.csv(input_data, sep = ";", na.strings="0")
# rename latitude and longitude columns to lat and lng
names(df)[names(df) == "latitude"] <- "lat"
names(df)[names(df) == "longitude"] <- "lng"
return(df)
}
#Run the functions with the raw data
input_file <- 'Join2018-2020.csv'
orig <- loaddata(input_file)
orig$lat <- as.numeric(as.character(orig$lat))
orig$lng <- as.numeric(as.character(orig$lng))
orig$is_na = ifelse(is.na(orig$lat), TRUE, FALSE)
orig<-orig[!(orig$is_na=="TRUE"),]
# Drop unneeded columns and rows to slim the dataset
# -----------------------------------------
#### a. Only pictures taken after 2004 (inclusive)
# Creating a subset of the data, just for picture that were taken after 2004
df <- orig
df$post_create <- as.Date(df$post_create_date)
reduced_04 <- filter(df, post_create >= "2004-01-01") # we create a new dataframe just for the data after 2004
reduced_04 <- filter(reduced_04, post_create <= "2020-11-03") # we create a new dataframe just for the data after 2004
trimmed <- reduced_04
rm(df)
# add in reprojection to CH1903+ / LV95
# -------------------------------------
# slice off min. fields to acquire reprojection coords, then rejoin to dataset
# order matters: need lat and lng in first two fields + two other fields for data (any will do)
df <- trimmed[, c(3, 2, 1, 4:10)]
proj_crs <- proj_sng # reproject from WGS84 to Swiss nat.coords
df <- reproject(df, proj_crs)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Bessel 1841 ellipsoid in Proj4
## definition
df <- df[, c(11:12)] # just keep the new coords and then merge back into source
trimmed <- cbind(trimmed, df)
trimmed <- trimmed[, c(1, 4:10, 13,14)]
rm(df)
# rename latitude and longitude columns to lat and lng
names(trimmed)[names(trimmed) == "crs_y"] <- "lat"
names(trimmed)[names(trimmed) == "crs_x"] <- "lng"
df <- trimmed
# create SpatialPointsPolygon for data
df_map_coords <- df[c("lng", "lat")]
df_map_data <- df[ ,c(1:8)]
df_map <- SpatialPointsDataFrame(coords=df_map_coords, data=df_map_data)
proj4string(df_map) <- CRS(proj_sng)
df_map_St <- st_as_sf(df_map)
# preparation of shapefiles of land cover, cantonal limits and parks polygons.
parks <- readOGR(dsn="//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Parks.shp", layer: "Swiss_Parks"
## with 29 features
## It has 13 fields
## Integer64 fields read as strings: OBJECTID Rechtsgrun
## Warning in readOGR(dsn = "//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Parks.shp", : Z-dimension discarded
parks <- spTransform(parks, CRS(proj_sng))
landcover <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/LandCover_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\LandCover_Parks.shp", layer: "LandCover_Parks"
## with 39308 features
## It has 5 fields
## Integer64 fields read as strings: OBJECTID Id gridcode
landcover <- spTransform(landcover, CRS(proj_sng))
kanton <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Kanton.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +vunits=m
## +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Kanton.shp", layer: "Swiss_Kanton"
## with 50 features
## It has 20 fields
## Integer64 fields read as strings: ERSTELL_J REVISION_J HERKUNFT_J KANTONSNUM EINWOHNERZ
## Warning in readOGR("//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Kanton.shp", : Z-dimension discarded
kanton <- spTransform(kanton, CRS(proj_sng))
#Convert a geographical object in a simple feature with geometry
#Reduce the time of processing, making efficient the process of intersection
parks_St <- st_as_sf(parks)
landcover_St <- st_as_sf(landcover)
kanton_St <- st_as_sf(kanton)
# Intersect points (Flickr images) with limits of parks
Flickr_park <- st_intersection(parks_St, df_map_St)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Intersect points (Flickr images) with land cover.
Flickr_park <- st_intersection(landcover_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Include data of canton in the points data
Flickr_park <- st_intersection(kanton_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Flickr_Parks_map.png")
ggplot() +
geom_sf(data = kanton_St, fill = "grey", color = "white") +
geom_sf(data = Flickr_park, fill = NA, color = "yellow", size=0.005) +
geom_sf(data = parks_St, fill= NA, color = "dark green", size= 1) +
ggtitle("Flickr pictures in Swiss National Parks") +
theme(plot.title = element_text(hjust = 0.5))+
coord_sf()
rm(trimmed)
rm(df)
rm(t)
rm(df_map)
rm(df_map_data)
rm(df_map_coords)
rm(df_map_St)
rm(landcover)
rm(parks)
rm(kanton)
flickr <- Flickr_park[c(39, 40, 41, 19, 23, 28, 38, 43, 44, 45, 47)]
# rename
names(flickr)[names(flickr) == "post_guid"] <- "photo_id"
names(flickr)[names(flickr) == "user_guid"] <- "USER"
names(flickr)[names(flickr) == "post_create_date"] <- "DATE"
names(flickr)[names(flickr) == "gridcode"] <- "USE"
names(flickr)[names(flickr) == "NAME"] <- "CANTON"
names(flickr)[names(flickr) == "SHAPE_Area"] <- "km2"
names(flickr)[names(flickr) == "tags"] <- "TAGS"
names(flickr)[names(flickr) == "Name"] <- "PARK"
names(flickr)[names(flickr) == "post_title"] <- "TITLE"
names(flickr)[names(flickr) == "post_body"] <- "BODY"
flickr <- flickr %>%
mutate(lng = unlist(map(flickr$geometry,1)),
lat = unlist(map(flickr$geometry,2)))
flickr <- st_drop_geometry(flickr)
#General statistics of each column
summary(flickr$photo_id)
## Length Class Mode
## 77697 character character
#The library dplyr provide several notifications/observations, so we are going to avoid to write in the report.
options(dplyr.summarise.inform = FALSE)
Only pictures that are in categories of land cover that are natural
# Using the table obtained before, we create another subset avoiding the pictures in the land cover 100, 120, 140, 160
# The pictures that are in land cover related to natural categories were considered
flickr_nat<- filter(flickr, USE!=100, USE!=120, USE!=140, USE!=160) # The expression USE! means avoid/reduce
#General statistics of each column
summary(flickr_nat$photo_id)
## Length Class Mode
## 56805 character character
# Application of the filter of the use, only natural land cover
db <- flickr_nat #We create a copy for future use
ddbb <- db # We create another copy for future use
a. Number of visitors each Swiss National Parks, according the land cover,
NA values refers to users or land cover category that doesn’t exist in the park
#Filtering with a dynamic table to identify the parks, user, and the land cover.
landcover <- flickr%>%
group_by(PARK, USE, USER)%>%
summarise(land_picture = n()) #Total number of picture in a land cover, per park, taken by a user.
#Counting the number of users per park in each land cover
landcover <- landcover %>%
group_by(PARK, USE) %>%
summarise(park_landcover = n()) #Total number of user who took pictures in a land cover per park.
#Pivot table that create a matrix of park (rows) and land cover (columns)
lu<- pivot_wider(landcover, names_from = USE, values_from = park_landcover)
landcover_park <- as.data.frame(lu) # We transform the matrix into a dataframe
#Rename of the codes (numbers) with the names of the days
#Land cover categories that are reduced
names(landcover_park)[names(landcover_park)=="100"] <- "Building area"
names(landcover_park)[names(landcover_park)=="120"] <- "Traffic and transportation surface"
names(landcover_park)[names(landcover_park)=="140"] <- "Special settlement areas"
names(landcover_park)[names(landcover_park)=="160"] <- "Recreation and green spaces"
#Land cover categories that are considered for the analysis
names(landcover_park)[names(landcover_park)=="200"] <- "Fruit growing, viticulture, horticulture"
names(landcover_park)[names(landcover_park)=="220"] <- "Arable and Forage Cultivation"
names(landcover_park)[names(landcover_park)=="240"] <- "Alpine farming"
names(landcover_park)[names(landcover_park)=="300"] <- "Forest"
names(landcover_park)[names(landcover_park)=="400"] <- "Lakes and rivers"
names(landcover_park)[names(landcover_park)=="420"] <- "Unproductives"
#Organizing the table by alphabetical order of Parks names
landcover_park <- landcover_park %>% arrange(PARK)
#Exporting the table to csv in the root folder
#write.csv(landcover_park,"~/GitHub/Flickr_SwissParks/Results/LandCover.csv", row.names = FALSE)
#Displaying the table with a better format
kbl(landcover_park)%>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left", latex_options = c("striped", "repeat_header"))
| PARK | Building area | Traffic and transportation surface | Special settlement areas | Recreation and green spaces | Arable and Forage Cultivation | Alpine farming | Forest | Lakes and rivers | Unproductives | Fruit growing, viticulture, horticulture |
|---|---|---|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 53 | 28 | 1 | 6 | 69 | 79 | 77 | 23 | 64 | NA |
| Jurapark Aargau | 124 | 64 | 10 | 17 | 166 | NA | 102 | 43 | 1 | 25 |
| Landschaftspark Binntal | 15 | 10 | 1 | 2 | 52 | 14 | 42 | 17 | 39 | NA |
| Naturpark Beverin | 40 | 52 | 6 | 15 | 128 | 85 | 163 | 78 | 102 | 3 |
| Naturpark Diemtigtal | 7 | 8 | NA | NA | 21 | 41 | 29 | 10 | 19 | NA |
| Naturpark Gantrisch | 66 | 48 | 3 | 9 | 195 | 81 | 139 | 89 | 26 | 9 |
| Naturpark Pfyn-Finges | 113 | 39 | 18 | 24 | 105 | 104 | 80 | 45 | 158 | 38 |
| Naturpark Thal | 29 | 22 | 1 | 1 | 57 | 50 | 87 | 2 | 3 | 6 |
| Parc du Doubs | 93 | 73 | 4 | 8 | 127 | 75 | 162 | 78 | 28 | 4 |
| Parc Ela | 151 | 90 | 16 | 29 | 277 | 158 | 307 | 84 | 162 | 9 |
| Parc Jura vaudois | 173 | 100 | 14 | 36 | 191 | 214 | 282 | 141 | 37 | 13 |
| Parc naturel périurbain du Jorat | NA | 2 | 1 | 1 | 15 | NA | 38 | 3 | NA | NA |
| Parc naturel régional de la Vallée du Trient | 58 | 45 | 42 | 4 | 67 | 81 | 223 | 61 | 220 | 15 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 297 | 190 | 18 | 299 | 636 | 430 | 395 | 102 | 176 | 9 |
| Parc régional Chasseral | 118 | 80 | 13 | 20 | 155 | 172 | 152 | 5 | 5 | 31 |
| Parco Val Calanca | 7 | 4 | 2 | NA | 10 | 11 | 28 | 6 | 11 | NA |
| Regionaler Naturpark Schaffhausen | 22 | 16 | 3 | 3 | 57 | 1 | 39 | 10 | 1 | 12 |
| Schweizerischer Nationalpark | NA | 1 | NA | NA | NA | 1 | 85 | 8 | 60 | NA |
| UNESCO Biosphäre Entlebuch | 25 | 9 | 1 | 4 | 75 | 74 | 68 | 10 | 34 | 1 |
| Wildnispark Zürich Sihlwald | 1 | 7 | NA | NA | NA | NA | 50 | 5 | NA | NA |
rm(lu)
rm(landcover)
b. Area (km2) of national parks
#Filtering in a dynamic table to extract Park and square area
#We need this step because some parks have several polygons (with different square area)
#So the extracted information of square area is provided by the surface of each polygon
#where the points (Flickr pictures) where located.
area_portions <- ddbb %>%
group_by(PARK, km2) %>%
summarise(photos_km = n())
#Summarizing the total area of the polygons that compose the parks where the points where located.
area_park <- area_portions %>%
group_by(PARK) %>%
summarise(km = round(sum(km2),2)) #Round for having just two decimals, and sum for adding all polygons surface
# Obtaining the total area (km2) covered by all parks:
area_park %>%
summarize(area = sum(km))#km2
## # A tibble: 1 x 1
## area
## <dbl>
## 1 5848.
rm(area_portions)
c. Count of pictures in the parks
#Filtering in a dynamic table to extract the number of pictures per Park
photos <- ddbb %>%
group_by(PARK) %>%
summarise(photo = n()) %>% #Total number of pictures per park
mutate(ratio_photos= round((photo / sum(photo))*100, 2)) %>% #Percentage with 2 decimal of pictures
arrange(desc(ratio_photos)) #Sort the table per percentage of pictures
#Filtering in a dynamic table to extract Park and square area#The total number of pictures is:
ddbb %>%
summarize(count = n()) #pictures
## count
## 1 56805
d. Count of Users in the parks
#Filtering in a dynamic table to extract the number of pictures per Park and users
users_flickr <- ddbb %>%
group_by(PARK, USER) %>%
summarise(user_photos = n())
#From the previous filter, we can count the number of users per park
users_flickr <- users_flickr %>%
group_by(PARK) %>%
summarise(user = n())%>%
mutate(ratio_users= round((user / sum(user))*100, 2)) %>%
arrange(desc(ratio_users))
#The total summary about the number of users
users_flickr %>%
summarize(users = sum(user))#users
## # A tibble: 1 x 1
## users
## <int>
## 1 5743
d. Statistic of area (km2), users and photos for each Swiss National Parks: NOTE: THESE VALUES ONLY CONSIDER THE NATURAL LAND COVER (the dismissed categories are: Building area”, “Traffic and transportation surface”, “Special settlement areas”, “Recreation and green spaces”
general <- data.frame("Parks"= area_park$PARK, "Area(km2)"= area_park$km, "#Flickr"= photos$photo, "%Flickr"= photos$ratio_photos, "Flickr/km2"=round((photos$photo/area_park$km),2),"#Users"=users_flickr$user, "%Users"=users_flickr$ratio_users, "Users/km2"=round((users_flickr$user/area_park$km),2), check.names=FALSE )
general <- general %>% arrange(Parks)
kbl(general) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| Parks | Area(km2) | #Flickr | %Flickr | Flickr/km2 | #Users | %Users | Users/km2 |
|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 198.64 | 13340 | 23.48 | 67.16 | 1134 | 19.75 | 5.71 |
| Jurapark Aargau | 299.05 | 6201 | 10.92 | 20.74 | 642 | 11.18 | 2.15 |
| Landschaftspark Binntal | 164.78 | 6056 | 10.66 | 36.75 | 530 | 9.23 | 3.22 |
| Naturpark Beverin | 515.11 | 4324 | 7.61 | 8.39 | 453 | 7.89 | 0.88 |
| Naturpark Diemtigtal | 135.50 | 4250 | 7.48 | 31.37 | 362 | 6.30 | 2.67 |
| Naturpark Gantrisch | 405.68 | 3015 | 5.31 | 7.43 | 358 | 6.23 | 0.88 |
| Naturpark Pfyn-Finges | 276.51 | 2990 | 5.26 | 10.81 | 357 | 6.22 | 1.29 |
| Naturpark Thal | 139.39 | 2885 | 5.08 | 20.70 | 338 | 5.89 | 2.42 |
| Parc du Doubs | 293.65 | 2786 | 4.90 | 9.49 | 305 | 5.31 | 1.04 |
| Parc Ela | 657.82 | 1769 | 3.11 | 2.69 | 239 | 4.16 | 0.36 |
| Parc Jura vaudois | 530.62 | 1715 | 3.02 | 3.23 | 198 | 3.45 | 0.37 |
| Parc naturel périurbain du Jorat | 9.38 | 1529 | 2.69 | 163.01 | 172 | 2.99 | 18.34 |
| Parc naturel régional de la Vallée du Trient | 206.92 | 1317 | 2.32 | 6.36 | 129 | 2.25 | 0.62 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 632.10 | 1290 | 2.27 | 2.04 | 120 | 2.09 | 0.19 |
| Parc régional Chasseral | 473.30 | 1140 | 2.01 | 2.41 | 107 | 1.86 | 0.23 |
| Parco Val Calanca | 120.49 | 759 | 1.34 | 6.30 | 86 | 1.50 | 0.71 |
| Regionaler Naturpark Schaffhausen | 213.03 | 587 | 1.03 | 2.76 | 71 | 1.24 | 0.33 |
| Schweizerischer Nationalpark | 170.33 | 364 | 0.64 | 2.14 | 51 | 0.89 | 0.30 |
| UNESCO Biosphäre Entlebuch | 394.49 | 326 | 0.57 | 0.83 | 46 | 0.80 | 0.12 |
| Wildnispark Zürich Sihlwald | 10.95 | 162 | 0.29 | 14.79 | 45 | 0.78 | 4.11 |
#write.csv(general,"~/GitHub/Flickr_SwissParks/Results/General.csv", row.names = FALSE)
#Pictures taken by user
contrib <- ddbb %>%
group_by(PARK, USER)%>%
summarise(photo = n())
summary(contrib$photo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 9.891 6.000 1629.000
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Picture_Park_Taken.png")
ggplot(contrib, aes(x=log(photo))) +
geom_histogram(aes(y=(..count..)/sum(..count..)),
binwidth = .1,
colour="black", fill="white")+
facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
theme_bw()+
theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 6),
strip.text = element_text(face = "italic"),
text = element_text(size = 10))+
ggtitle('Distribution of percentage of pictures taken by users in the Parks')
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Trend_Pictures.png")
x <-contrib$photo
y <- x + rnorm(2500, 0, 50)
## Warning in x + rnorm(2500, 0, 50): longer object length is not a multiple of
## shorter object length
qqplot(x, y)
ddbb$DATE <- as.Date(ddbb$DATE)
ddbb$YEAR <- format(ddbb$DATE, format="%Y")
#Calculation of statistics per year
year_photo <- ddbb %>%
group_by(PARK, YEAR, USER)%>%
summarise(photo_yearly = n())
yearly_counts <- year_photo %>%
group_by(PARK, YEAR)%>%
summarise(VISITS = n())
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/general_trend.png")
ggplot(data = yearly_counts, aes(x = YEAR, y = VISITS, group = 1)) +
geom_line() +
facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
theme_bw()+
theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 6),
strip.text = element_text(face = "italic"),
text = element_text(size = 10))+
ggtitle('Annual distribution of users per Park')
rm(year_photo)
Number of users per season in each Swiss National Parks
#create dates variable for your column that contains dates
dates <- ddbb$DATE
#get the month of the date, create new column called month
ddbb$month<-(month(dates, label=TRUE))
ddbb$SEASON <- ifelse(ddbb$month %in% c('May','Jun','Jul'), "Summer",
ifelse (ddbb$month %in% c('Aug','Sep','Oct'), "Autumn",
ifelse (ddbb$month %in% c('Nove','Dec','Jan'),
"Winter", "Spring")))
#Calculation of pictures per user in seasons
season <- ddbb %>%
group_by(PARK, SEASON, USER)%>%
summarise(picture_season = n())
#From the previous table, we count the number of users per park in each season
f <- c("Summer", "Spring", "Winter", "Autumn")
color_list <- c("#ffbf00", "#00b04f","#00b0f0", "#ed7c31")
col_order <- c("PARK", "Autumn", "Winter", "Spring", "Summer")
season_users <- season %>%
group_by(PARK, SEASON) %>%
summarise(season_users = n())%>%
mutate(SEASON = factor(SEASON, levels = f))%>%
arrange(SEASON)
season_park<- pivot_wider(season_users, names_from = SEASON, values_from = season_users)
season_park <- season_park[, col_order]
season_park <- season_park %>% arrange(PARK)
#write.csv(season_park,"//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Season_Park.csv", row.names = FALSE)
#Display of the table with a better format
kbl(season_park)%>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| PARK | Autumn | Winter | Spring | Summer |
|---|---|---|---|---|
| Biosfera Val Müstair | 97 | 20 | 21 | 73 |
| Jurapark Aargau | 91 | 48 | 97 | 100 |
| Landschaftspark Binntal | 51 | 17 | 16 | 35 |
| Naturpark Beverin | 145 | 52 | 67 | 141 |
| Naturpark Diemtigtal | 31 | 16 | 22 | 20 |
| Naturpark Gantrisch | 145 | 102 | 138 | 124 |
| Naturpark Pfyn-Finges | 140 | 55 | 91 | 115 |
| Naturpark Thal | 53 | 28 | 49 | 43 |
| Parc du Doubs | 120 | 54 | 96 | 119 |
| Parc Ela | 269 | 122 | 158 | 215 |
| Parc Jura vaudois | 216 | 156 | 200 | 193 |
| Parc naturel périurbain du Jorat | 14 | 12 | 20 | 13 |
| Parc naturel régional de la Vallée du Trient | 213 | 44 | 85 | 178 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 458 | 276 | 281 | 417 |
| Parc régional Chasseral | 145 | 94 | 122 | 138 |
| Parco Val Calanca | 21 | 3 | 15 | 17 |
| Regionaler Naturpark Schaffhausen | 39 | 20 | 25 | 34 |
| Schweizerischer Nationalpark | 72 | 11 | 9 | 35 |
| UNESCO Biosphäre Entlebuch | 63 | 36 | 52 | 53 |
| Wildnispark Zürich Sihlwald | 12 | 11 | 23 | 16 |
#Stacked normalized horizontal bar graph
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/season_park.png")
ggplot(season_users, #my data
aes(x = PARK, y = season_users, fill = SEASON, group = SEASON)) +
geom_bar(position = "fill", #Creates stacked bars with 100% of proportion
stat="identity",
color='white',
width = 0.8)+
scale_y_continuous(labels = scales::percent)+ # Change the name of the labels into percentages
scale_fill_manual(values=color_list)+
guides(fill = guide_legend(reverse = TRUE))+ #Change the order of the names in the label
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "#17252D", color = "#17252D"),
strip.text = element_text(size = rel(1), face = "bold", color = "white", margin = margin(5,0,5,0)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 50))+
ggtitle("Proportion of visitors per Season")+
labs(x ="Parks", y = "Visitors")+
ggplot2::coord_flip() #We change the axis to fit into horizontal bars
rm(season)
rm(season_users)
Number of visitors per day in each Swiss National Parks
ddbb$DAY <- weekdays(as.Date(ddbb$DATE))
ddbb$DAY <- factor(ddbb$DAY, levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
photoUser_day <- ddbb %>%
group_by(PARK, DAY, USER)%>%
summarise(photo_day = n())
#Counting the number of users per park in each season
daily <- photoUser_day %>%
group_by(PARK, DAY) %>%
summarise(visit_day = n())
sd<- pivot_wider(daily, names_from = DAY, values_from = visit_day)
daily_park <- as.data.frame(sd)
daily_park <- daily_park %>% arrange(PARK)
#Display of the table with a better format
kbl(daily_park)%>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| PARK | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday | Sunday |
|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 35 | 37 | 35 | 31 | 33 | 61 | 43 |
| Jurapark Aargau | 55 | 40 | 50 | 42 | 52 | 66 | 86 |
| Landschaftspark Binntal | 23 | 21 | 17 | 17 | 24 | 25 | 33 |
| Naturpark Beverin | 54 | 50 | 49 | 66 | 63 | 102 | 109 |
| Naturpark Diemtigtal | 10 | 9 | 12 | 10 | 16 | 28 | 27 |
| Naturpark Gantrisch | 57 | 63 | 68 | 73 | 81 | 122 | 139 |
| Naturpark Pfyn-Finges | 65 | 60 | 47 | 55 | 77 | 90 | 109 |
| Naturpark Thal | 23 | 16 | 20 | 26 | 16 | 39 | 54 |
| Parc du Doubs | 41 | 38 | 47 | 42 | 59 | 100 | 110 |
| Parc Ela | 118 | 126 | 114 | 118 | 138 | 174 | 184 |
| Parc Jura vaudois | 88 | 88 | 89 | 84 | 126 | 200 | 222 |
| Parc naturel périurbain du Jorat | 10 | 6 | 6 | 2 | 10 | 8 | 18 |
| Parc naturel régional de la Vallée du Trient | 73 | 75 | 67 | 79 | 77 | 115 | 131 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 182 | 160 | 183 | 187 | 218 | 390 | 376 |
| Parc régional Chasseral | 65 | 64 | 62 | 60 | 85 | 113 | 152 |
| Parco Val Calanca | 10 | 10 | 9 | 7 | 9 | 21 | 12 |
| Regionaler Naturpark Schaffhausen | 14 | 12 | 19 | 9 | 15 | 33 | 33 |
| Schweizerischer Nationalpark | 18 | 20 | 17 | 21 | 18 | 26 | 23 |
| UNESCO Biosphäre Entlebuch | 38 | 24 | 27 | 34 | 35 | 46 | 56 |
| Wildnispark Zürich Sihlwald | 6 | 2 | 11 | 4 | 3 | 14 | 23 |
#write.csv(daily_park,"~/GitHub/Flickr_SwissParks/Results/Day-Visits_Park.csv", row.names = FALSE)
Bar plot of visitors per day in the Parks
#Horizontal bar chart of daily number of visitors per Park
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park.png") #Export the graph as *.png
ggplot(daily, #my data
aes(x = PARK, y = visit_day, fill = DAY, group = DAY)) +
geom_bar(position = position_dodge(),
stat="identity",
color='white',
width = 0.8)+
scale_fill_manual(values=c("#2980b9", "#5dade2", "#76d7c4","#2ecc71", "#27ae60", "#f39c12","#d35400"))+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "#17252D", color = "#17252D"),
strip.text = element_text(size = rel(0.5), face = "bold", color = "white", margin = margin(5,0,5,0)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 24))+
xlab('Swiss National Parks') + ylab('Number of visitors') +
ggtitle('Daily number of visitors')
Mosaic plot of visitors per day in the Parks (Two test)
#GRAPH OF MOSAIC PLOT OF PARKS AND DAYS
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic1.png")
ggplot(data = photoUser_day) +
geom_mosaic(aes(x=product(DAY, PARK), fill = DAY), offset = 0.02) +
scale_fill_manual(values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400"))+
labs(y = "DAYS", title=" Mosaic plot of visits per Day in Parks ") +
theme(legend.position = "none",
axis.text.y=element_text(size = 10),
axis.ticks.y=element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10)
)
## Warning: `unite_()` was deprecated in tidyr 1.2.0.
## Please use `unite()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#POINT GRAPH OF FREQUENCY OF VISIT IN PARKS PER DAY
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic2.png")
ggplot(daily, aes(x = PARK, y = visit_day, color = DAY)) +
geom_point() +
labs(x = "Days", y = "Visits") +
scale_color_manual(name = "Days:", values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400")) +
theme(legend.title = element_text(size = 14, face = 2),
legend.position = 'bottom',
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8))+
ggtitle('Daily total number of visitors in Parks')
Analysis of users according the canton origin, visited park, and time span between the first and the last picture
canton<- ddbb %>%
group_by(CANTON, USER)%>%
summarise(canton_user = n())
co<- pivot_wider(canton, names_from = CANTON, values_from = canton_user)
canton_users <- as.data.frame(co)
canton_users[is.na(canton_users)] = 0
canton_users <- canton_users %>% mutate_if(is.numeric, ~1 * (. > 0))
canton_users$Total_Canton = rowSums(canton_users[,c(2:13)])
canton_users <- canton_users[order(canton_users$Total_Canton, decreasing = FALSE),]
#write.csv(canton_users,"~/GitHub/Flickr_SwissParks/Results/Canton_Users.csv", row.names = FALSE)
ggplot(canton_users, aes(x=Total_Canton))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(Total_Canton)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of Cantons by users ",x="Number of visited Cantons", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 12, 1))+
scale_y_continuous(breaks = seq(0, 6000, 200))
#Heatmap
coul <- colorRampPalette(c("beige", "green"))(5)
rownames(canton_users) <- canton_users[,1]
canton_users[,1]<- NULL
canton_users[,13]<- NULL
data <-as.matrix(canton_users)
heatmap(data, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Canton")
rm(canton)
rm(co)
rm(coul)
park <- ddbb %>%
group_by(PARK, USER)%>%
summarise(park_user = n())
po<- pivot_wider(park, names_from = PARK, values_from = park_user)
park_users <- as.data.frame(po)
park_users[is.na(park_users)] = 0
park_users <- park_users %>% mutate_if(is.numeric, ~1 * (. > 0))
park_users$Total_Park = rowSums(park_users[,c(2:21)])
park_users <- park_users[order(park_users$Total_Park, decreasing = FALSE),]
#write.csv(park_users,"~/GitHub/Flickr_SwissParks/Results/Park_Users.csv", row.names = FALSE)
ggplot(park_users, aes(x=Total_Park))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(Total_Park)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of Parks by users ",x="Number of visited Parks", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 20, 1))+
scale_y_continuous(breaks = seq(0, 6000, 200))
#Heatmap
coul <- colorRampPalette(c("beige", "blue"))(10)
rownames(park_users) <- park_users[,1]
park_users[,1]<- NULL
park_users[,21]<- NULL
park_users <-as.matrix(park_users)
heatmap(park_users, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Park")
rm(park)
rm(po)
rm(coul)
user <- ddbb %>%
group_by(USER)%>%
summarise(photos = n())
user <- as.data.frame(user)
dt <- ddbb %>%
group_by(USER, DATE)%>%
summarise(photos = n())%>%
mutate(DATE=as.Date(DATE))
dt1 <- dt %>%
group_by(USER) %>%
arrange(DATE) %>%
slice(1L)
first <- as.data.frame(dt1)
names(first)[names(first)=="DATE"] <- "start"
dt2 <- dt %>%
group_by(USER) %>%
arrange(desc(DATE)) %>%
slice(1L)
last <- as.data.frame(dt)
names(last)[names(last)=="DATE"] <- "end"
dt_final <- merge(last, first, by="USER")
dur <- dt_final %>%
mutate(
days = end - start,
seconds = as.numeric(difftime(end, start)) / 365.25,
years = round(interval(start, end) / years(1))
)
time_user <- merge(user, dur, by="USER", all=T)
time_user <- subset(time_user, select=-c(photos.x,photos.y))
time_user <- time_user[order(time_user$seconds, decreasing = TRUE),]
ggplot(time_user, aes(x=years))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(years)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of time between first and last picture by users ",x="time span in years", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 20, 1))+
scale_y_continuous(breaks = seq(0, 6000, 500))
#write.csv(time_user,"~/GitHub/Flickr_SwissParks/Results/Time_Users.csv", row.names = FALSE)
rm(user)
rm(dt)
rm(dt1)
rm(dt2)
rm(last)
rm(dur)
set.seed(42)
base <- data.frame(ddbb$photo_id, ddbb$USER, ddbb$PARK, ddbb$TAGS)
names(base)[names(base) == "ddbb.PARK"] <- "doc_id"
names(base)[names(base) == "ddbb.TAGS"] <- "text"
names(base)[names(base) == "ddbb.USER"] <- "author"
names(base)[names(base) == "ddbb.photo_id"] <- "origin"
base$text <- gsub("[[:digit:]]", " ", base$text) # Remove numbers
base$text <- gsub("\\s+", " ", str_trim(base$text)) # Remove extra whitespaces
base$text <- gsub("(\\-¨“„)", " ", base$text) # Remove extra symbols
base$text <- gsub("(?!\\.)[[:punct:]]", " ", base$text, perl=TRUE) #remove punctuation
base_source=DataframeSource(base)
base_corpus=VCorpus(base_source)
base_corpus_clean <- tm_map(base_corpus, removeWords, stopwords("english"))
base_corpus_root <- tm_map(base_corpus_clean,stemDocument) #find to root of the words
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Parks.png")
wordcloud(base_corpus_root,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#myStopwords <- setdiff(myStopwords, c("d", "e"))
#text_corpus_clean <- tm_map(base_corpus_clean, removeWords, myStopwords)
#png(//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Nationalpark.png")
sub_jurapark <- base%>%filter(doc_id=="Schweizerischer Nationalpark")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("english"))
sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words
jurapark <- wordcloud(sub_jurapark,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jurapark.png")
sub_jurapark <- base%>%filter(doc_id=="Jurapark Aargau")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("english"))
sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words
jurapark <- wordcloud(sub_jurapark,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gruyere.png")
sub_gruyere <- base%>%filter(doc_id=="Parc naturel régional Gruyère Pays-d’Enhaut")
sub_gruyere=DataframeSource(sub_gruyere)
sub_gruyere=VCorpus(sub_gruyere)
sub_gruyere <- tm_map(sub_gruyere, removeWords, stopwords("english"))
sub_gruyere <- tm_map(sub_gruyere,stemDocument) #find to root of the words
gruyere <- wordcloud(sub_gruyere,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Mustair.png")
sub_mustair <- base%>%filter(doc_id=="Biosfera Val Müstair")
sub_mustair=DataframeSource(sub_mustair)
sub_mustair=VCorpus(sub_mustair)
sub_mustair <- tm_map(sub_mustair, removeWords, stopwords("english"))
sub_mustair <- tm_map(sub_mustair,stemDocument) #find to root of the words
mustair <- wordcloud(sub_mustair,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Sihlwald.png")
sub_sihlwald <- base%>%filter(doc_id=="Wildnispark Zürich Sihlwald")
sub_sihlwald=DataframeSource(sub_sihlwald)
sub_sihlwald=VCorpus(sub_sihlwald)
sub_sihlwald <- tm_map(sub_sihlwald, removeWords, stopwords("english"))
sub_sihlwald <- tm_map(sub_sihlwald,stemDocument) #find to root of the words
sihlwald <- wordcloud(sub_sihlwald,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Chasseral.png")
sub_chasseral <- base%>%filter(doc_id=="Parc régional Chasseral")
sub_chasseral=DataframeSource(sub_chasseral)
sub_chasseral=VCorpus(sub_chasseral)
sub_chasseral <- tm_map(sub_chasseral, removeWords, stopwords("english"))
sub_chasseral <- tm_map(sub_chasseral,stemDocument) #find to root of the words
chasseral <- wordcloud(sub_chasseral,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Vaudois.png")
sub_vaudois <- base%>%filter(doc_id=="Parc Jura vaudois")
sub_vaudois=DataframeSource(sub_vaudois)
sub_vaudois=VCorpus(sub_vaudois)
sub_vaudois <- tm_map(sub_vaudois, removeWords, stopwords("english"))
sub_vaudois <- tm_map(sub_vaudois,stemDocument) #find to root of the words
vaudois <- wordcloud(sub_vaudois,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Ela.png")
sub_ela <- base%>%filter(doc_id=="Parc Ela")
sub_ela=DataframeSource(sub_ela)
sub_ela=VCorpus(sub_ela)
sub_ela <- tm_map(sub_ela, removeWords, stopwords("english"))
sub_ela <- tm_map(sub_ela,stemDocument) #find to root of the words
ela <- wordcloud(sub_ela,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gantrisch.png")
sub_gantrisch <- base%>%filter(doc_id=="Naturpark Gantrisch")
sub_gantrisch=DataframeSource(sub_gantrisch)
sub_gantrisch=VCorpus(sub_gantrisch)
sub_gantrisch <- tm_map(sub_gantrisch, removeWords, stopwords("english"))
sub_gantrisch <- tm_map(sub_gantrisch,stemDocument) #find to root of the words
gantrisch <- wordcloud(sub_gantrisch,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Beverin.png")
sub_beverin <- base%>%filter(doc_id=="Naturpark Beverin")
sub_beverin=DataframeSource(sub_beverin)
sub_beverin=VCorpus(sub_beverin)
sub_beverin <- tm_map(sub_beverin, removeWords, stopwords("english"))
sub_beverin <- tm_map(sub_beverin,stemDocument) #find to root of the words
beverin <- wordcloud(sub_beverin,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Schaffhausen.png")
sub_schaffhausen <- base%>%filter(doc_id=="Regionaler Naturpark Schaffhausen")
sub_schaffhausen=DataframeSource(sub_schaffhausen)
sub_schaffhausen=VCorpus(sub_schaffhausen)
sub_schaffhausen <- tm_map(sub_schaffhausen, removeWords, stopwords("english"))
sub_schaffhausen <- tm_map(sub_schaffhausen,stemDocument) #find to root of the words
schaffhausen <- wordcloud(sub_schaffhausen,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Trient.png")
sub_trient <- base%>%filter(doc_id=="Parc naturel régional de la Vallée du Trient")
sub_trient=DataframeSource(sub_trient)
sub_trient=VCorpus(sub_trient)
sub_trient <- tm_map(sub_trient, removeWords, stopwords("english"))
sub_trient <- tm_map(sub_trient,stemDocument) #find to root of the words
trient <- wordcloud(sub_trient,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Doubs.png")
sub_doubs <- base%>%filter(doc_id=="Parc du Doubs")
sub_doubs=DataframeSource(sub_doubs)
sub_doubs=VCorpus(sub_doubs)
sub_doubs <- tm_map(sub_doubs, removeWords, stopwords("english"))
sub_doubs <- tm_map(sub_doubs,stemDocument) #find to root of the words
doubs <- wordcloud(sub_doubs,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Thal.png")
sub_thal <- base%>%filter(doc_id=="Naturpark Thal")
sub_thal=DataframeSource(sub_thal)
sub_thal=VCorpus(sub_thal)
sub_thal <- tm_map(sub_thal, removeWords, stopwords("english"))
sub_thal <- tm_map(sub_thal,stemDocument) #find to root of the words
thal <- wordcloud(sub_thal,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Pfyn-Finges.png")
sub_pfyn_finges <- base%>%filter(doc_id=="Naturpark Pfyn-Finges")
sub_pfyn_finges=DataframeSource(sub_pfyn_finges)
sub_pfyn_finges=VCorpus(sub_pfyn_finges)
sub_pfyn_finges <- tm_map(sub_pfyn_finges, removeWords, stopwords("english"))
sub_pfyn_finges <- tm_map(sub_pfyn_finges,stemDocument) #find to root of the words
pfyn_finges <- wordcloud(sub_pfyn_finges,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Diemigtal.png")
sub_diemtigtal <- base%>%filter(doc_id=="Naturpark Diemtigtal")
sub_diemtigtal=DataframeSource(sub_diemtigtal)
sub_diemtigtal=VCorpus(sub_diemtigtal)
sub_diemtigtal <- tm_map(sub_diemtigtal, removeWords, stopwords("english"))
sub_diemtigtal <- tm_map(sub_diemtigtal,stemDocument) #find to root of the words
diemtigtal <- wordcloud(sub_diemtigtal,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Entlebuch.png")
sub_entlebuch <- base%>%filter(doc_id=="UNESCO Biosphäre Entlebuch")
sub_entlebuch=DataframeSource(sub_entlebuch)
sub_entlebuch=VCorpus(sub_entlebuch)
sub_entlebuch <- tm_map(sub_entlebuch, removeWords, stopwords("english"))
sub_entlebuch <- tm_map(sub_entlebuch,stemDocument) #find to root of the words
entlebuch <- wordcloud(sub_entlebuch,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Calanca.png")
sub_calanca<- base%>%filter(doc_id=="Parco Val Calanca")
sub_calanca=DataframeSource(sub_calanca)
sub_calanca=VCorpus(sub_calanca)
sub_calanca <- tm_map(sub_calanca, removeWords, stopwords("english"))
sub_calanca <- tm_map(sub_calanca,stemDocument) #find to root of the words
calanca <- wordcloud(sub_calanca,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Binntal.png")
sub_binntal<- base%>%filter(doc_id=="Landschaftspark Binntal")
sub_binntal=DataframeSource(sub_binntal)
sub_binntal=VCorpus(sub_binntal)
sub_binntal <- tm_map(sub_binntal, removeWords, stopwords("english"))
sub_binntal <- tm_map(sub_binntal,stemDocument) #find to root of the words
binntal <- wordcloud(sub_binntal,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jorat.png")
sub_jorat<- base%>%filter(doc_id=="Parc naturel périurbain du Jorat")
sub_jorat=DataframeSource(sub_jorat)
sub_jorat=VCorpus(sub_jorat)
sub_jorat <- tm_map(sub_jorat, removeWords, stopwords("english"))
sub_jorat <- tm_map(sub_jorat,stemDocument) #find to root of the words
jorat <- wordcloud(sub_jorat,
min.freq = 50,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))